package it.uniroma2.art.uima.imdb;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.HTMLElementName;
import net.htmlparser.jericho.Source;
import net.htmlparser.jericho.StartTag;
import net.htmlparser.jericho.TextExtractor;

import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.EmptyFSList;
import org.apache.uima.jcas.cas.FSList;
import org.apache.uima.jcas.cas.NonEmptyFSList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class IMDBAnnotator extends JCasAnnotator_ImplBase {
	private String imdbUrl = "http://www.imdb.com";

	private static Logger logger = LoggerFactory.getLogger(IMDBAnnotator.class);

	@Override
	public void process(JCas jcas) throws AnalysisEngineProcessException {
		logger.debug("inizio process");
		// String movieTitle = "";
		String docText = jcas.getDocumentText();
		StringReader sr = new StringReader(docText);

		try {
			logger.debug("pre process input file");
			// MicrosoftConditionalCommentTagTypes.register();
			// PHPTagTypes.register();
			// PHPTagTypes.PHP_SHORT.deregister();
			// MasonTagTypes.register();
			Source source = new Source(sr);
			source.fullSequentialParse();
			logger.debug("post parsed input file");

			boolean isFullMoviePage = true;
			List<Element> h1ElementsList = source.getAllElements(HTMLElementName.H1);
			final Element h1Element = h1ElementsList.get(0);

			List<Element> smallInH1ElementsList = h1Element.getAllElements(HTMLElementName.SMALL);
			if (smallInH1ElementsList.size() > 0) {
				String textSmall = smallInH1ElementsList.get(0).getTextExtractor().toString();
				if (textSmall.contains("Full cast"))
					isFullMoviePage = false;
			}
			// first check if it is a "first page" (movie page) or a "second page" (full cast list page)

			if (isFullMoviePage) { // it is a "first page"
				boolean isTVSeries = false;
				List<Element> spanElementsList = h1Element.getAllElements(HTMLElementName.SPAN);
				if (spanElementsList.size() > 0) {
					String spanText = spanElementsList.get(0).getTextExtractor().toString();
					if (spanText.contains("TV Series"))
						isTVSeries = true;
				}
				if (!isTVSeries) { // it is a film
					annotateFilm(jcas, source);

				} else {// it is a series
					annotateSeries(jcas, source);
				}
			} else {// it is a full cast page
				annotateFullCast(jcas, source);

			}
		} catch (IOException e) {
			e.printStackTrace();
		}

	}

	private void annotateFilm(JCas jcas, Source source) {
		logger.debug("it is a film");
		final Element h1Element = source.getAllElements(HTMLElementName.H1).get(0);
		String movieTitle;
		IMDBFilm imdbFilm = new IMDBFilm(jcas);
		imdbFilm.setBegin(h1Element.getBegin());
		imdbFilm.setEnd(h1Element.getEnd());
		if (h1Element.getAllElements(HTMLElementName.SPAN).size() == 1) {
			TextExtractor textExtractorJustH1 = new TextExtractor(h1Element) {
				public boolean excludeElement(StartTag startTag) {
					return h1Element.getStartTag() != startTag;
				}
			};
			movieTitle = textExtractorJustH1.toString();
		} else {
			int numElemSpan = h1Element.getAllElements(HTMLElementName.SPAN).size();
			final Element spanOrigTitleElement = h1Element.getAllElements(HTMLElementName.SPAN).get(
					numElemSpan - 1);
			// Element iElement = spanOrigTitleElement.getAllElements(HTMLElementName.I).get(0);
			if (spanOrigTitleElement.getAllElements(HTMLElementName.I).size() > 0) {
				TextExtractor textExtractorJustSPAN = new TextExtractor(spanOrigTitleElement) {
					public boolean excludeElement(StartTag startTag) {
						return spanOrigTitleElement.getStartTag() != startTag;
					}
				};
				movieTitle = textExtractorJustSPAN.toString();
			} else {
				TextExtractor textExtractorJustH1 = new TextExtractor(h1Element) {
					public boolean excludeElement(StartTag startTag) {
						return h1Element.getStartTag() != startTag;
					}
				};
				movieTitle = textExtractorJustH1.toString();
			}
		}
		List<Element> linkElementsList = source.getAllElements(HTMLElementName.LINK);
		String site = "";
		for (int i = 0; i < linkElementsList.size(); ++i) {
			Element linkElement = linkElementsList.get(i);
			if (linkElement.getAttributeValue("rel").compareTo("canonical") == 0)
				site = linkElement.getAttributeValue("href");
		}

		String id = getIdFromNameAndSite(movieTitle, site);
		logger.debug("id = " + id);
		imdbFilm.setMovieId(id);
		logger.debug("movieTitle = " + movieTitle);
		imdbFilm.setTitle(movieTitle);

		List<Element> spanElementsList = h1Element.getAllElements(HTMLElementName.SPAN);
		// get the year
		String yearString = spanElementsList.get(0).getTextExtractor().toString();
		yearString = yearString.substring(1, yearString.length() - 1);
		boolean isInteger = isInteger(yearString);
		if (!isInteger) {
			if (yearString.split(" ").length > 1)
				yearString = yearString.split(" ")[yearString.split(" ").length - 1];
			else { // strange case, see for example the movie "Scream"
				yearString = spanElementsList.get(1).getTextExtractor().toString();
				yearString = yearString.substring(1, yearString.length() - 1);
				isInteger = isInteger(yearString);
				if (!isInteger) {
					yearString = yearString.split(" ")[1];
				}
			}
		}
		int year = Integer.parseInt(yearString);
		logger.debug("year = " + year);
		imdbFilm.setYear(year);

		Element trElement = source.getAllElements(HTMLElementName.TR).get(0);

		// get and add the description
		List<Element> pElementList = trElement.getAllElements(HTMLElementName.P);
		String description = "";
		if (pElementList.size() > 1)
			description = pElementList.get(1).getTextExtractor().toString();
		logger.debug("description = " + description);
		imdbFilm.setDescription(description);

		// get the score and the star(s)
		List<Element> divElementList = trElement.getAllElements(HTMLElementName.DIV);

		double rate = 0.0;
		List<IMDBStar> starsList = new ArrayList<IMDBStar>();
		List<IMDBPerson> personList = new ArrayList<IMDBPerson>();

		for (Element divElement : divElementList) {
			if (divElement.getAttributeValue("class") == null) {

			} else if (divElement.getAttributeValue("class").compareTo("star-box") == 0) {
				spanElementsList = divElement.getAllElements(HTMLElementName.SPAN);
				for (Element spanElement : spanElementsList) {
					if (spanElement.getAttributeValue("class") == null) {

					} else if (spanElement.getAttributeValue("class").compareTo("rating-rating") == 0) {
						String rateString = spanElement.getTextExtractor().toString().trim().substring(0, 3);
						rate = Double.parseDouble(rateString);
					}
				}
			} else if (divElement.getAttributeValue("class").compareTo("txt-block") == 0) {
				Element h4Element = divElement.getAllElements(HTMLElementName.H4).get(0);
				if (h4Element.getTextExtractor().toString().contains("Stars")) {
					addPersonToList(divElement, personList, jcas);
					for (IMDBPerson imdbPerson : personList) {
						IMDBStar imdbStar = new IMDBStar(jcas);
						imdbStar.setBegin(imdbPerson.getBegin());
						imdbStar.setEnd(imdbPerson.getEnd());
						imdbStar.setPersonId(imdbPerson.getPersonId());
						imdbStar.setName(imdbPerson.getName());
						// imdbStar.setFirstName(imdbPerson.getFirstName());
						// imdbStar.setLastName(imdbPerson.getLastName());
						imdbStar.setImdbSite(imdbPerson.getImdbSite());
						imdbStar.addToIndexes();
						starsList.add(imdbStar);
					}
				}
			}
		}

		// set the rate
		logger.debug("rate = " + rate);
		imdbFilm.setImdbScore(rate);

		// add the stars List to the imdbFilm
		FSList starsFSList = null;
		logger.debug("Stars");
		for (IMDBStar imdbStar : starsList) {
			logger.debug("\t" + imdbStar.getPersonId() + "\t" + imdbStar.getName() + "\t"
					+ imdbStar.getImdbSite().getSite());
			if (starsFSList == null) {
				starsFSList = new NonEmptyFSList(jcas);
				((NonEmptyFSList) starsFSList).setHead(imdbStar);
			} else {
				NonEmptyFSList tempFSList = new NonEmptyFSList(jcas);
				tempFSList.setTail(starsFSList);
				tempFSList.setHead(imdbStar);
				starsFSList = tempFSList;
			}
		}
		if (starsFSList == null)
			starsFSList = new EmptyFSList(jcas);
		imdbFilm.setStarsList(starsFSList);

		imdbFilm.addToIndexes();
	}

	private void annotateSeries(JCas jcas, Source source) {
		logger.debug("it is a TVseries");
		final Element h1Element = source.getAllElements(HTMLElementName.H1).get(0);
		String seriesTitle;
		IMDBTVSeries imdbTVSeries = new IMDBTVSeries(jcas);
		imdbTVSeries.setBegin(h1Element.getBegin());
		imdbTVSeries.setEnd(h1Element.getEnd());
		if (h1Element.getAllElements(HTMLElementName.SPAN).size() == 1) {
			TextExtractor textExtractorJustH1 = new TextExtractor(h1Element) {
				public boolean excludeElement(StartTag startTag) {
					return h1Element.getStartTag() != startTag;
				}
			};
			seriesTitle = textExtractorJustH1.toString();
		} else {
			final Element spanOrigTitleElement = h1Element.getAllElements(HTMLElementName.SPAN).get(1);
			TextExtractor textExtractorJustSPAN = new TextExtractor(spanOrigTitleElement) {
				public boolean excludeElement(StartTag startTag) {
					return spanOrigTitleElement.getStartTag() != startTag;
				}
			};
			seriesTitle = textExtractorJustSPAN.toString();
		}
		if (seriesTitle.startsWith("\""))
			seriesTitle = seriesTitle.substring(1, (seriesTitle.length() - 1));
		List<Element> linkElementsList = source.getAllElements(HTMLElementName.LINK);
		String site = "";
		for (int i = 0; i < linkElementsList.size(); ++i) {
			Element linkElement = linkElementsList.get(i);
			if (linkElement.getAttributeValue("rel").compareTo("canonical") == 0)
				site = linkElement.getAttributeValue("href");
		}

		String id = getIdFromNameAndSite(seriesTitle, site);
		logger.debug("id = " + id);
		imdbTVSeries.setMovieId(id);
		logger.debug("seriesTitle = " + seriesTitle);
		imdbTVSeries.setTitle(seriesTitle);

		// then take the start and the end year (if present)
		List<Element> spanElementsList = h1Element.getAllElements(HTMLElementName.SPAN);
		String[] yearsStringArray = spanElementsList.get(0).getTextExtractor().toString().trim().split(" ");
		String yearsString = yearsStringArray[2].substring(0, yearsStringArray[2].length() - 1);
		// String[] yearsArray = yearsString.split("–");
		String yearBeginString, yearEndString = "";
		yearBeginString = yearsString.substring(0, 4);
		if (yearsString.length() > 6)
			yearEndString = yearsString.substring(5);
		logger.debug("year begin = " + Integer.parseInt(yearBeginString));
		imdbTVSeries.setYear(Integer.parseInt(yearBeginString));
		if (yearEndString.compareTo("") != 0) {
			logger.debug("year end = " + Integer.parseInt(yearEndString));
			imdbTVSeries.setEndYear(Integer.parseInt(yearEndString));
		}

		Element trElement = source.getAllElements(HTMLElementName.TR).get(0);
		// take the desciption
		List<Element> pElementList = trElement.getAllElements(HTMLElementName.P);
		String description = "";
		if (pElementList.size() > 1)
			description = pElementList.get(1).getTextExtractor().toString();
		logger.debug("description = " + description);
		imdbTVSeries.setDescription(description);

		// get the score, the cretor(s) and the stars
		List<Element> divElementList = trElement.getAllElements(HTMLElementName.DIV);

		double rate = 0.0;
		List<IMDBPerson> personList;
		List<IMDBCreator> creatorsList = new ArrayList<IMDBCreator>();
		List<IMDBStar> starsList = new ArrayList<IMDBStar>();
		for (Element divElement : divElementList) {
			if (divElement.getAttributeValue("class") == null) {

			} else if (divElement.getAttributeValue("class").compareTo("star-box") == 0) {
				spanElementsList = divElement.getAllElements(HTMLElementName.SPAN);
				for (Element spanElement : spanElementsList) {
					if (spanElement.getAttributeValue("class") == null) {

					} else if (spanElement.getAttributeValue("class").compareTo("rating-rating") == 0) {
						String rateString = spanElement.getTextExtractor().toString().trim().substring(0, 3);
						rate = Double.parseDouble(rateString);
					}
				}
			} else if (divElement.getAttributeValue("class").compareTo("txt-block") == 0) {
				Element h4Element = divElement.getAllElements(HTMLElementName.H4).get(0);
				if (h4Element.getTextExtractor().toString().contains("Creator")) {
					personList = new ArrayList<IMDBPerson>();
					addPersonToList(divElement, personList, jcas);
					for (IMDBPerson imdbPerson : personList) {
						IMDBCreator imdbCreator = new IMDBCreator(jcas);
						imdbCreator.setBegin(imdbPerson.getBegin());
						imdbCreator.setEnd(imdbPerson.getEnd());
						imdbCreator.setPersonId(imdbPerson.getPersonId());
						imdbCreator.setName(imdbPerson.getName());
						imdbCreator.setImdbSite(imdbPerson.getImdbSite());
						imdbCreator.addToIndexes();
						creatorsList.add(imdbCreator);
					}
				} else if (h4Element.getTextExtractor().toString().contains("Stars")) {
					personList = new ArrayList<IMDBPerson>();
					addPersonToList(divElement, personList, jcas);
					for (IMDBPerson imdbPerson : personList) {
						IMDBStar imdbStar = new IMDBStar(jcas);
						imdbStar.setBegin(imdbPerson.getBegin());
						imdbStar.setEnd(imdbPerson.getEnd());
						imdbStar.setPersonId(imdbPerson.getPersonId());
						imdbStar.setName(imdbPerson.getName());
						imdbStar.setImdbSite(imdbPerson.getImdbSite());
						imdbStar.addToIndexes();
						starsList.add(imdbStar);
					}
				}
			}
		}

		// set the rate
		logger.debug("rate = " + rate);
		imdbTVSeries.setImdbScore(rate);

		// add the creators and the stars List to the imdbTVSeries
		FSList creatorsFSList = null;
		logger.debug("Creator(s)");
		for (IMDBPerson imdbPerson : creatorsList) {
			logger.debug("\t" + imdbPerson.getPersonId() + "\t" + imdbPerson.getName() + "\t"
					+ imdbPerson.getImdbSite().getSite());
			if (creatorsFSList == null) {
				creatorsFSList = new NonEmptyFSList(jcas);
				((NonEmptyFSList) creatorsFSList).setHead(imdbPerson);
			} else {
				NonEmptyFSList tempFSList = new NonEmptyFSList(jcas);
				tempFSList.setTail(creatorsFSList);
				tempFSList.setHead(imdbPerson);
				creatorsFSList = tempFSList;
			}
		}
		if (creatorsFSList == null)
			creatorsFSList = new EmptyFSList(jcas);
		imdbTVSeries.setCreatorsList(creatorsFSList);

		FSList starsFSList = null;
		logger.debug("Stars");
		for (IMDBPerson imdbPerson : starsList) {
			logger.debug("\t" + imdbPerson.getPersonId() + "\t" + imdbPerson.getName() + "\t"
					+ imdbPerson.getImdbSite().getSite());
			if (starsFSList == null) {
				starsFSList = new NonEmptyFSList(jcas);
				((NonEmptyFSList) starsFSList).setHead(imdbPerson);
			} else {
				NonEmptyFSList tempFSList = new NonEmptyFSList(jcas);
				tempFSList.setTail(starsFSList);
				tempFSList.setHead(imdbPerson);
				starsFSList = tempFSList;
			}
		}
		if (starsFSList == null)
			starsFSList = new EmptyFSList(jcas);
		imdbTVSeries.setStarsList(starsFSList);

		imdbTVSeries.addToIndexes();

	}

	private void annotateFullCast(JCas jcas, Source source) {
		logger.debug("it is a full cast");
		String movieTitle;
		int startAll;
		IMDBFilmCast fullCast = new IMDBFilmCast(jcas);

		Element h1Element = source.getAllElements(HTMLElementName.H1).get(0);
		List<Element> iElementList = h1Element.getAllElements(HTMLElementName.I);
		if (iElementList.size() == 0) {
			Element aElement = h1Element.getAllElements(HTMLElementName.A).get(0);
			startAll = aElement.getBegin();
			movieTitle = aElement.getTextExtractor().toString();
		} else {
			final Element spanElement = h1Element.getAllElements(HTMLElementName.SPAN).get(2);
			startAll = spanElement.getBegin();
			TextExtractor textExtractorJustSPAN = new TextExtractor(spanElement) {
				public boolean excludeElement(StartTag startTag) {
					return spanElement.getStartTag() != startTag;
				}
			};
			movieTitle = textExtractorJustSPAN.toString();
		}
		if (movieTitle.startsWith("\""))
			movieTitle = movieTitle.substring(1, (movieTitle.length() - 1));
		String movieSite = imdbUrl
				+ h1Element.getAllElements(HTMLElementName.A).get(0).getAttributeValue("href");

		// get all the directors and writers (name + urlPage)and add all to the two lists
		List<IMDBPerson> directorsList = new ArrayList<IMDBPerson>();
		List<IMDBPerson> writersList = new ArrayList<IMDBPerson>();

		List<Element> divElementList = source.getAllElements(HTMLElementName.DIV);
		Element divElementTn15content = null;
		for (Element divElement : divElementList) {
			String id = divElement.getAttributeValue("id");
			if (id == null)
				continue;
			if (id.trim().compareTo("tn15content") == 0) {
				divElementTn15content = divElement;
				break;
			}
		}

		if (divElementTn15content != null) {
			List<Element> tableElementList = divElementTn15content.getAllElements(HTMLElementName.TABLE);
			Element tableDirectorsElement = tableElementList.get(1);
			Element tableWritersElement = tableElementList.get(2);

			List<Element> trElementList = tableDirectorsElement.getAllElements(HTMLElementName.TR);
			for (Element trElement : trElementList) {
				Element tdElement = trElement.getAllElements(HTMLElementName.TD).get(0);
				List<Element> aElementList = tdElement.getAllElements(HTMLElementName.A);
				if (aElementList.size() == 0)
					continue;
				Element aElement = aElementList.get(0);
				if (aElement.getAttributeValue("class") == null) {

				} else if (aElement.getAttributeValue("class").trim().compareTo("glossary") == 0)
					continue;
				String nameDirector = aElement.getTextExtractor().toString();
				String directorSite;
				if (aElement.getAttributeValue("href").startsWith("http://"))
					directorSite = aElement.getAttributeValue("href");
				else
					directorSite = imdbUrl + aElement.getAttributeValue("href");
				IMDBDirector imdbDirector = new IMDBDirector(jcas);
				IMDBSite imdbSite = new IMDBSite(jcas);
				imdbSite.setSite(directorSite);
				imdbDirector.setPersonId(getIdFromNameAndSite(nameDirector, directorSite));
				imdbDirector.setName(nameDirector);
				imdbDirector.setBegin(aElement.getBegin());
				imdbDirector.setEnd(aElement.getEnd());
				imdbDirector.setImdbSite(imdbSite);
				imdbDirector.addToIndexes();
				directorsList.add(imdbDirector);
			}

			trElementList = tableWritersElement.getAllElements(HTMLElementName.TR);
			for (Element trElement : trElementList) {
				Element tdElement = trElement.getAllElements(HTMLElementName.TD).get(0);
				List<Element> aElementList = tdElement.getAllElements(HTMLElementName.A);
				if (aElementList.size() == 0)
					continue;
				Element aElement = aElementList.get(0);
				if (aElement.getAttributeValue("class") == null) {

				} else if (aElement.getAttributeValue("class").trim().compareTo("glossary") == 0)
					continue;
				String nameWriter = aElement.getTextExtractor().toString();
				String writerSite;
				if (aElement.getAttributeValue("href").startsWith("http://"))
					writerSite = aElement.getAttributeValue("href");
				else
					writerSite = imdbUrl + aElement.getAttributeValue("href");
				IMDBWriter imdbWriter = new IMDBWriter(jcas);
				IMDBSite imdbSite = new IMDBSite(jcas);
				imdbSite.setSite(writerSite);
				imdbWriter.setPersonId(getIdFromNameAndSite(nameWriter, writerSite));
				imdbWriter.setName(nameWriter);
				imdbWriter.setBegin(aElement.getBegin());
				imdbWriter.setEnd(aElement.getEnd());
				imdbWriter.setImdbSite(imdbSite);
				imdbWriter.addToIndexes();
				writersList.add(imdbWriter);
			}
		}

		// get all the actors (name + urlPage) and add all to the list
		List<Element> tableElementsList = source.getAllElements(HTMLElementName.TABLE);
		List<IMDBPerson> castList = new ArrayList<IMDBPerson>();
		for (Element tableElement : tableElementsList) {
			if (tableElement.getAttributeValue("class") == null) {

			} else if (tableElement.getAttributeValue("class").compareTo("cast") == 0) {
				List<Element> trElementsList = tableElement.getAllElements(HTMLElementName.TR);
				for (Element trElement : trElementsList) {
					if (trElement.getAllElements(HTMLElementName.TD).size() == 1)
						continue;
					Element tdNameElement = trElement.getAllElements(HTMLElementName.TD).get(1);
					Element aElement = tdNameElement.getAllElements(HTMLElementName.A).get(0);
					String actorName = aElement.getTextExtractor().toString();
					String actorSite;
					if (aElement.getAttributeValue("href").startsWith("http://"))
						actorSite = aElement.getAttributeValue("href");
					else
						actorSite = imdbUrl + aElement.getAttributeValue("href");
					IMDBActor imdbActor = new IMDBActor(jcas);
					imdbActor.setPersonId(getIdFromNameAndSite(actorName, actorSite));
					imdbActor.setName(actorName);
					imdbActor.setBegin(aElement.getBegin());
					imdbActor.setEnd(aElement.getEnd());
					IMDBSite imdbSite = new IMDBSite(jcas);
					imdbSite.setSite(actorSite);
					imdbActor.setImdbSite(imdbSite);
					imdbActor.addToIndexes();
					castList.add(imdbActor);
				}
				fullCast.setBegin(startAll);
				fullCast.setEnd(tableElement.getEnd());

				break;
			}
		}

		// add all the annotation to the main annotation
		String id = getIdFromNameAndSite(movieTitle, movieSite);
		logger.debug("movieId = " + id);
		fullCast.setMovieId(id);
		logger.debug("movieSite = " + movieSite);
		IMDBSite imdbMovieSite = new IMDBSite(jcas);
		imdbMovieSite.setSite(movieSite);
		fullCast.setMovieSite(imdbMovieSite);

		// add the directors List to the imdbFilm
		FSList directorsFSList = null;
		logger.debug("Director");
		for (IMDBPerson imdbPerson : directorsList) {
			logger.debug("\t" + imdbPerson.getPersonId() + "\t" + imdbPerson.getName() + "\t"
					+ imdbPerson.getImdbSite().getSite());
			if (directorsFSList == null) {
				directorsFSList = new NonEmptyFSList(jcas);
				((NonEmptyFSList) directorsFSList).setHead(imdbPerson);
			} else {
				NonEmptyFSList tempFSList = new NonEmptyFSList(jcas);
				tempFSList.setTail(directorsFSList);
				tempFSList.setHead(imdbPerson);
				directorsFSList = tempFSList;
			}
		}
		if (directorsFSList == null)
			directorsFSList = new EmptyFSList(jcas);
		fullCast.setDirectorsList(directorsFSList);

		// add the writers List to the imdbFilm
		FSList writersFSList = null;
		logger.debug("Writer");
		for (IMDBPerson imdbPerson : writersList) {
			logger.debug("\t" + imdbPerson.getPersonId() + "\t" + imdbPerson.getName() + "\t"
					+ imdbPerson.getImdbSite().getSite());
			if (writersFSList == null) {
				writersFSList = new NonEmptyFSList(jcas);
				((NonEmptyFSList) writersFSList).setHead(imdbPerson);
			} else {
				NonEmptyFSList tempFSList = new NonEmptyFSList(jcas);
				tempFSList.setTail(writersFSList);
				tempFSList.setHead(imdbPerson);
				writersFSList = tempFSList;
			}
		}
		if (writersFSList == null)
			writersFSList = new EmptyFSList(jcas);
		fullCast.setWritersList(writersFSList);

		// add the cast List to the imdbActors
		FSList actorFSList = null;
		logger.debug("Cast");
		for (IMDBPerson imdbPerson : castList) {
			logger.debug("\t" + imdbPerson.getPersonId() + "\t" + imdbPerson.getName() + "\t"
					+ imdbPerson.getImdbSite().getSite());
			if (actorFSList == null) {
				actorFSList = new NonEmptyFSList(jcas);
				((NonEmptyFSList) actorFSList).setHead(imdbPerson);
				((NonEmptyFSList) actorFSList).setTail(new EmptyFSList(jcas));
			} else {
				NonEmptyFSList tempFSList = new NonEmptyFSList(jcas);
				tempFSList.setTail(actorFSList);
				tempFSList.setHead(imdbPerson);
				actorFSList = tempFSList;
			}
		}
		if (actorFSList == null)
			actorFSList = new EmptyFSList(jcas);

		if (logger.isDebugEnabled()) {
			int sizeList = getListSize(actorFSList);
			logger.debug("Cast number = " + sizeList);
		}

		fullCast.setActorList(actorFSList);

		fullCast.addToIndexes();
	}

	private void addPersonToList(Element divNode, List<IMDBPerson> personList, JCas jcas) {
		List<Element> aElementList = divNode.getAllElements(HTMLElementName.A);
		for (Element aElement : aElementList) {
			String name = aElement.getTextExtractor().toString();
			int lastPosSpace = name.lastIndexOf(" ");
			String siteUrl;
			if (aElement.getAttributeValue("href").startsWith("http://"))
				siteUrl = aElement.getAttributeValue("href");
			else
				siteUrl = imdbUrl + aElement.getAttributeValue("href");
			IMDBSite imdbSite = new IMDBSite(jcas);
			imdbSite.setSite(siteUrl);
			IMDBPerson imdbPerson = new IMDBPerson(jcas);
			imdbPerson.setBegin(divNode.getBegin());
			imdbPerson.setEnd(divNode.getEnd());
			imdbPerson.setPersonId(getIdFromNameAndSite(name, siteUrl));
			imdbPerson.setName(name);
			imdbPerson.setImdbSite(imdbSite);
			personList.add(imdbPerson);
		}
	}

	private int getListSize(FSList fsList) {
		if (fsList instanceof EmptyFSList)
			return 0;
		NonEmptyFSList nonEmptyList = (NonEmptyFSList) fsList;
		return getListSize(nonEmptyList.getTail()) + 1;
	}

	private boolean isInteger(String number) {
		try {
			Integer.parseInt(number);
			return true;
		} catch (NumberFormatException e) {
			return false;
		}
	}

	private String getIdFromNameAndSite(String name, String site) {
		return (name + " - " + site.substring(site.length() - 6, site.length() - 1)).replace(":", " ");
	}
}
